library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
time_series_confirmed_long <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
  rename(Province_State = "Province/State", Country_Region = "Country/Region")  %>% 
               pivot_longer(-c(Province_State, Country_Region, Lat, Long),
                             names_to = "Date", values_to = "Confirmed") 
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
# Let's get the times series data for deaths
time_series_deaths_long <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")) %>%
  rename(Province_State = "Province/State", Country_Region = "Country/Region")  %>% 
  pivot_longer(-c(Province_State, Country_Region, Lat, Long),
               names_to = "Date", values_to = "Deaths")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
# Create Keys 
time_series_confirmed_long <- time_series_confirmed_long %>% 
  unite(Key, Province_State, Country_Region, Date, sep = ".", remove = FALSE)
time_series_deaths_long <- time_series_deaths_long %>% 
  unite(Key, Province_State, Country_Region, Date, sep = ".") %>% 
  select(Key, Deaths)

# Join tables
time_series_long_joined <- full_join(time_series_confirmed_long,
    time_series_deaths_long, by = c("Key")) %>% 
    select(-Key)

# Reformat the data
time_series_long_joined$Date <- mdy(time_series_long_joined$Date)

# Create Report table with counts
time_series_long_joined_counts <- time_series_long_joined %>% 
  pivot_longer(-c(Province_State, Country_Region, Lat, Long, Date),
               names_to = "Report_Type", values_to = "Counts")
# Plot graph to a pdf outputfile
pdf("images/time_series_example_plot.pdf", width=6, height=3)
time_series_long_joined %>% 
  group_by(Country_Region,Date) %>% 
  summarise_at(c("Confirmed", "Deaths"), sum) %>% 
  filter (Country_Region == "US") %>% 
    ggplot(aes(x = Date,  y = Deaths)) + 
    geom_point() +
    geom_line() +
    ggtitle("US COVID-19 Deaths")
dev.off
## function (which = dev.cur()) 
## {
##     if (which == 1) 
##         stop("cannot shut down device 1 (the null device)")
##     .External(C_devoff, as.integer(which))
##     dev.cur()
## }
## <bytecode: 0x2a0bab0>
## <environment: namespace:grDevices>
# Version 2
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
ggplotly(
  time_series_long_joined %>% 
    group_by(Country_Region,Date) %>% 
    summarise_at(c("Confirmed", "Deaths"), sum) %>% 
    filter (Country_Region == "US") %>% 
    ggplot(aes(x = Date,  y = Deaths)) + 
      geom_point() +
      geom_line() +
      ggtitle("US COVID-19 Deaths")
 )
library(plotly)
# Subset the time series data to include US deaths
US_deaths <- time_series_long_joined %>% 
    group_by(Country_Region,Date) %>% 
    summarise_at(c("Confirmed", "Deaths"), sum) %>% 
    filter (Country_Region == "US")
# Collect the layers for agraph of the US time series data for covid deaths
 p <- ggplot(data = US_deaths, aes(x = Date,  y = Deaths)) + 
        geom_point() +
        geom_line() +
        ggtitle("US COVID-19 Deaths")
# Plot the graph using ggplotly
ggplotly(p)
library(gganimate)
library(transformr)
theme_set(theme_bw())

## An animation of the confirmed cases in select countries

data_time <- time_series_long_joined %>% 
    group_by(Country_Region,Date) %>% 
    summarise_at(c("Confirmed", "Deaths"), sum) %>% 
    filter (Country_Region %in% c("China","Korea, South","Japan","Italy","US")) 
p <- ggplot(data_time, aes(x = Date,  y = Confirmed, color = Country_Region)) + 
      geom_point() +
      geom_line() +
      ggtitle("Confirmed COVID-19 Cases") +
      geom_point(aes(group = seq_along(Date))) +
      transition_reveal(Date) 
# Some people needed to use this line instead
# animate(p,renderer = gifski_renderer(), end_pause = 15)
animate(p, end_pause = 15)

###Exercise activities ###Challenge 1

##Print a graph (different from the one above) to a png file using 3*ppi for the height and width and display the png file in the report using the above R Markdown format.

Confirmed_State_6_13<-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/06-13-2020.csv")) %>%
  filter (Country_Region == "US") %>% 
  group_by(Province_State, Country_Region) %>% 
  summarise(Confirmed = sum(Confirmed)) 
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` regrouping output by 'Province_State' (override with `.groups` argument)
Confirmed_State_9_13 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-13-2020.csv")) %>% 
  filter (Country_Region == "US") %>% 
  group_by(Province_State, Country_Region) %>% 
  summarise(Confirmed = sum(Confirmed)) 
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` regrouping output by 'Province_State' (override with `.groups` argument)
Confirmed_State_9_13 <- Confirmed_State_9_13 %>% 
  filter(Province_State != "Recovered") 
Confirmed_State_6_13_9_13_joined <- full_join(Confirmed_State_6_13,
      Confirmed_State_9_13, by = c("Province_State"))
head(Confirmed_State_6_13_9_13_joined)
## # A tibble: 6 x 5
## # Groups:   Province_State [6]
##   Province_State Country_Region.x Confirmed.x Country_Region.y Confirmed.y
##   <chr>          <chr>                  <dbl> <chr>                  <dbl>
## 1 Alabama        US                     24601 US                    138755
## 2 Alaska         US                       653 US                      6268
## 3 Arizona        US                     34660 US                    208512
## 4 Arkansas       US                     12095 US                     70219
## 5 California     US                    150018 US                    761728
## 6 Colorado       US                     29017 US                     61313
Confirmed_State_6_13_9_13_joined <- full_join(Confirmed_State_6_13,
      Confirmed_State_9_13, by = c("Province_State")) %>% 
      rename(Confirmed_6_13_2020 = "Confirmed.x", Confirmed_9_13_2020 = "Confirmed.y") %>% 
      select(-Country_Region.x, -Country_Region.y) %>% 
      replace_na(list(Confirmed_6_13_2020 = 0))
head(Confirmed_State_6_13_9_13_joined)
## # A tibble: 6 x 3
## # Groups:   Province_State [6]
##   Province_State Confirmed_6_13_2020 Confirmed_9_13_2020
##   <chr>                        <dbl>               <dbl>
## 1 Alabama                      24601              138755
## 2 Alaska                         653                6268
## 3 Arizona                      34660              208512
## 4 Arkansas                     12095               70219
## 5 California                  150018              761728
## 6 Colorado                     29017               61313
# which(is.na(Confirmed_State_6_13_9_13_joined))
Confirmed_State_6_13_9_13_joined_long <- Confirmed_State_6_13_9_13_joined %>% 
              pivot_longer(-c(Province_State),
                            names_to = "Date", values_to = "Confirmed")
ppi <- 100
png("us_confirmed_covid_cases_State_6_13_9_13.png", width=6*ppi, height=8*ppi, res=ppi)
ggplot(Confirmed_State_6_13_9_13_joined_long, aes(x = Confirmed,  y = Province_State))  + geom_bar(stat="identity",aes(color = Date)) +  labs(title="COVID-19 Confirmed Cases in US",
        x ="Number of Confirmed Cases", y = "State/Province in US")
## Warning: Removed 1 rows containing missing values (position_stack).
dev.off()
## png 
##   2

###Challenge 2

##Turn one of the exercises from Lab 5 into an interactive graph with plotyly

library(plotly)

ggplotly( ggplot(Confirmed_State_6_13_9_13_joined_long, aes(x = Confirmed,  y = Province_State))  + 
    geom_point(aes(color = Date)) )

###Challenge3

## Create an animated graph of your choosing using the time series data to display an aspect (e.g. states or countries) of the data that is important to you.

library(gganimate)
library(transformr)

time_series_long_joined %>% 
    group_by(Country_Region,Date) %>% 
    summarise_at(c("Confirmed", "Deaths"), sum) %>% 
    filter (Country_Region %in% c("Brazil","UK","Italy", 
                                "Mexico", "US","Spain","France","Russia","Germany","China")) %>% 
    ggplot(aes(x = Date,  y = Deaths, color = Country_Region)) + 
    geom_point() +
    geom_line() +
    ggtitle("COVID-19 Deaths")

data_time <- time_series_long_joined %>% 
    group_by(Country_Region,Date) %>% 
    summarise_at(c("Confirmed", "Deaths"), sum) %>% 
    filter (Country_Region %in% c("China","Korea, South","Japan","Italy","US")) 
p <- ggplot(data_time, aes(x = Date,  y = Confirmed, color = Country_Region)) + 
      geom_point() +
      geom_line() +
      ggtitle("Confirmed COVID-19 Cases") +
      geom_point(aes(group = seq_along(Date))) +
      transition_reveal(Date) 
# Some people needed to use this line instead
# animate(p,renderer = gifski_renderer(), end_pause = 15)
animate(p, end_pause = 15)